Quality Control & EDA

All samples

Sample clustering

sampleDists <- dist(t(assay(rld)))
plot(hclust(sampleDists))

Correlation Heatmap

annot = dplyr::select(experimental_metadata, condition)
row.names(annot) = experimental_metadata$sample_id
rld %>%
  assay() %>%
  cor() %>%
  pheatmap(color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100),
           annotation = annot,
           annotation_colors = list(condition = c(Control = "#BF3100", OKSM = "#E9B44C", 
                                                  Senolytic = "#1B998B", Senolytic_OKSM = "#5D576B")),
           cluster_rows = TRUE,
           cluster_cols = T,
           cellwidth = 13,
           cellheight = 13)

PCA

ntop = 500
rv <- rowVars(assay(rld))
select <- order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))]
pca = prcomp(t(assay(rld)[select,]))
percentVar <- pca$sdev^2/sum(pca$sdev^2)

pca_data <- plotPCA(rld, intgroup = c("condition", "replicate"), returnData=TRUE)
percentVar <- round(100 * attr(pca_data, "percentVar"), digits=2)
ggplot(pca_data, aes(PC1, PC2, color=condition, shape=replicate)) + geom_point(size=3) +
  scale_x_continuous(paste0("PC1: ",percentVar[1],"% variance")) +
  scale_y_continuous(paste0("PC2: ",percentVar[2],"% variance")) + 
  coord_fixed() + theme_classic() + geom_text(data = pca_data, aes(PC1,PC2, label = name), hjust = 1.2)

Removing TPM Outliers

effective_lengths = matrix(0, ncol=length(experimental_metadata$sample_id), nrow=17714)
colnames(effective_lengths)= experimental_metadata$sample_id
for( i in experimental_metadata$sample_id){
  effective_lengths[,i] = read.table(paste("data/ipsc/", i, ".genes.results",sep=""), sep="\t", header=TRUE)$effective_length
}
row.names(effective_lengths) = read.table(paste("data/ipsc/", i, ".genes.results",sep=""), sep="\t", header=TRUE)$gene_id

effective_lengths = rowMeans(effective_lengths[row.names(counts(dds)),])
ncrpk = counts(dds) / (effective_lengths / 1000)
ncrpk = apply(ncrpk, c(1,2), function(x){if(is.nan(x)){0}else{x}})
ncrpk = apply(ncrpk, c(1,2), function(x){if(is.infinite(x)){0}else{x}})
ncscalingfactor = colSums(ncrpk) / 1e6
nctpm = sweep(ncrpk, 2, ncscalingfactor, "/")

nctpm.melt = melt(nctpm)
ggplot(nctpm.melt, aes(x=Var2, y=value)) + geom_boxplot() + theme_classic() + theme(axis.text.x = element_text(angle = 90, colour="black", hjust = 1)) + scale_x_discrete("Sample") + scale_y_continuous("TPM")

tpm.threshold = 20000
test.tpm = apply(nctpm, 1, function(x){ any(x> tpm.threshold) })
ensembl.genes[names(test.tpm[test.tpm])] %>%
  as.data.frame() %>%
  datatable(options = list(scrollX = TRUE))

Differential Expression Analysis

Wald Tests

OKSM vs Control(TdTom)

generate_de_section(dds_wald, "OKSM", "Control")
## [1] "Number of significant genes (padj < 0.1 & log2FoldChange < log2(1.5)): 1258"

Senolytic vs Control(TdTom)

generate_de_section(dds_wald, "Senolytic", "Control")
## [1] "Number of significant genes (padj < 0.1 & log2FoldChange < log2(1.5)): 1399"

Senolytic OKSM vs Control(TdTom)

generate_de_section(dds_wald, "Senolytic_OKSM", "Control")
## [1] "Number of significant genes (padj < 0.1 & log2FoldChange < log2(1.5)): 1311"

OKSM vs Senolytic

generate_de_section(dds_wald, "OKSM", "Senolytic")
## [1] "Number of significant genes (padj < 0.1 & log2FoldChange < log2(1.5)): 124"

OKSM vs Senolytic_OKSM

generate_de_section(dds_wald, "OKSM", "Senolytic_OKSM")
## [1] "Number of significant genes (padj < 0.1 & log2FoldChange < log2(1.5)): 109"

Senolytic vs Senolytic_OKSM

generate_de_section(dds_wald, "Senolytic", "Senolytic_OKSM")
## [1] "Number of significant genes (padj < 0.1 & log2FoldChange < log2(1.5)): 0"

Likelihood Ratio Test

dds_LRT = nbinomLRT(dds, reduced = ~1)
res = results(dds_LRT)

res$gene_biotype= ensembl.genes$gene_biotype[match(row.names(res), ensembl.genes$gene_id)]
res$external_gene_name= ensembl.genes$external_gene_name[match(row.names(res), ensembl.genes$gene_id)]

hist(res$pvalue)

Number of significant genes (padj < 0.1):

sum(res$padj < 0.1, na.rm=T)
## [1] 2967

Visualisation

# assay(x) to access the count data
sig_mat_rld = assay(significant_rld)

# The apply function swaps the rows to samples and columns to genes -- the standard is the other way around: samples in cols and genes in rows, hence the transpose function
zscores = t(apply(sig_mat_rld, 1, function(x){ (x - mean(x)) / sd(x) }))

Elbow Plot

foo = as(zscores, "matrix")
bar = sapply(1:10, function(x){kmeans(foo, centers=x)$tot.withinss})
plot(bar, type="l")

Heatmap

pam_clust <- generate_data(zscores, 7, "pam")
# pam_clust <- as.data.frame(pam_clust)
# pam_clust$Cluster <- factor(pam_clust$Cluster, levels = c(5,4,3,1,2,6))
# pam_clust <- pam_clust[order(pam_clust$Cluster),]

pheatmap(pam_clust[,1:(ncol(pam_clust)-1)],
         color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100),
         fontsize_row = 5.5,
         annotation_col = annotation,
         annotation_colors = anno_colours,
         cluster_rows = FALSE,
         cluster_cols = FALSE)

Table of Genes

pam_clust_df <- pam_clust %>%
  as.data.frame() %>%
  mutate(gene_name = ensembl.genes[rownames(.),]$external_gene_name) %>%
  dplyr::select(gene_name, Cluster) %>%
  dplyr::rename("Gene Name" = gene_name)
datatable(pam_clust_df, options = list(scrollX = TRUE), class = "white-space: nowrap")

Number of Genes

c1 <- pam_clust_df[pam_clust_df$Cluster == 1, ] %>%
  dplyr::select(-Cluster)
saveRDS(c1, "output/ipsc/ipsc_c1.rds")

c2 <- pam_clust_df[pam_clust_df$Cluster == 2, ] %>%
  dplyr::select(-Cluster)
saveRDS(c2, "output/ipsc/ipsc_c2.rds")

c3 <- pam_clust_df[pam_clust_df$Cluster == 3, ] %>%
  dplyr::select(-Cluster)
saveRDS(c3, "output/ipsc/ipsc_c3.rds")

c4 <- pam_clust_df[pam_clust_df$Cluster == 4, ] %>%
  dplyr::select(-Cluster)
saveRDS(c4, "output/ipsc/ipsc_c4.rds")

c5 <- pam_clust_df[pam_clust_df$Cluster == 5, ] %>%
  dplyr::select(-Cluster)
saveRDS(c5, "output/ipsc/ipsc_c5.rds")

c6 <- pam_clust_df[pam_clust_df$Cluster == 6, ] %>%
  dplyr::select(-Cluster)
saveRDS(c6, "output/ipsc/ipsc_c6.rds")

c7 <- pam_clust_df[pam_clust_df$Cluster == 7, ] %>%
  dplyr::select(-Cluster)
saveRDS(c7, "output/ipsc/ipsc_c7.rds")

data.frame(Cluster = c("Cluster 1", "Cluster 2", "Cluster 3", "Cluster 4", 
                       "Cluster 5", "Cluster 6", "Cluster 7", "Total"),
                        Number_of_genes = c(nrow(c1), nrow(c2), nrow(c3), nrow(c4),
                                            nrow(c5), nrow(c6), nrow(c7),
                                            sum(c(nrow(c1),nrow(c2),nrow(c3),nrow(c4),
                                                  nrow(c5),nrow(c6),nrow(c7)))))
##     Cluster Number_of_genes
## 1 Cluster 1             773
## 2 Cluster 2             397
## 3 Cluster 3             128
## 4 Cluster 4             566
## 5 Cluster 5             613
## 6 Cluster 6             248
## 7 Cluster 7             242
## 8     Total            2967

Silhouette Plot

## Checking the stability of clusters
#fviz_nbclust(zscores, kmeans, method = "silhouette")
set.seed(2)
dd = as.dist((1 - cor(t(zscores)))/2)
pam = pam(dd, 7, diss = TRUE)
sil = silhouette(pam$clustering, dd)
plot(sil, border=NA, main = "Silhouette Plot for 7 Clusters")

Functional Enrichments

#listEnrichrSites()
setEnrichrSite("FlyEnrichr")
dbs <- listEnrichrDbs()
to_check <- c("GO_Biological_Process_2018", "KEGG_2019")

Cluster 1

GO_Biological_Process_2018

eresList$GO_Biological_Process_2018 %>%
  plot_enrichr("GO_Biological_Process_2018")

datatable(eresList[[1]], options = list(scrollX = TRUE), class = "white-space: nowrap")

KEGG_2019

eresList$KEGG_2019 %>%
  plot_enrichr("KEGG_2019") 

datatable(eresList[[2]], options = list(scrollX = TRUE), class = "white-space: nowrap")

Cluster 2

GO_Biological_Process_2018

eresList$GO_Biological_Process_2018 %>%
  plot_enrichr("GO_Biological_Process_2018")

datatable(eresList[[1]], options = list(scrollX = TRUE), class = "white-space: nowrap")

KEGG_2019

eresList$KEGG_2019 %>%
  plot_enrichr("KEGG_2019") 

datatable(eresList[[2]], options = list(scrollX = TRUE), class = "white-space: nowrap")

Cluster 3

GO_Biological_Process_2018

eresList$GO_Biological_Process_2018 %>%
  plot_enrichr("GO_Biological_Process_2018")

datatable(eresList[[1]], options = list(scrollX = TRUE), class = "white-space: nowrap")

KEGG_2019

eresList$KEGG_2019 %>%
  plot_enrichr("KEGG_2019") 

datatable(eresList[[2]], options = list(scrollX = TRUE), class = "white-space: nowrap")

Cluster 4

GO_Biological_Process_2018

eresList$GO_Biological_Process_2018 %>%
  plot_enrichr("GO_Biological_Process_2018")

datatable(eresList[[1]], options = list(scrollX = TRUE), class = "white-space: nowrap")

KEGG_2019

eresList$KEGG_2019 %>%
  plot_enrichr("KEGG_2019") 

datatable(eresList[[2]], options = list(scrollX = TRUE), class = "white-space: nowrap")

Cluster 5

GO_Biological_Process_2018

eresList$GO_Biological_Process_2018 %>%
  plot_enrichr("GO_Biological_Process_2018")

datatable(eresList[[1]], options = list(scrollX = TRUE), class = "white-space: nowrap")

KEGG_2019

eresList$KEGG_2019 %>%
  plot_enrichr("KEGG_2019") 

datatable(eresList[[2]], options = list(scrollX = TRUE), class = "white-space: nowrap")

Cluster 6

GO_Biological_Process_2018

eresList$GO_Biological_Process_2018 %>%
  plot_enrichr("GO_Biological_Process_2018")

datatable(eresList[[1]], options = list(scrollX = TRUE), class = "white-space: nowrap")

KEGG_2019

eresList$KEGG_2019 %>%
  plot_enrichr("KEGG_2019") 

datatable(eresList[[2]], options = list(scrollX = TRUE), class = "white-space: nowrap")

Cluster 7

GO_Biological_Process_2018

eresList$GO_Biological_Process_2018 %>%
  plot_enrichr("GO_Biological_Process_2018")

datatable(eresList[[1]], options = list(scrollX = TRUE), class = "white-space: nowrap")

KEGG_2019

eresList$KEGG_2019 %>%
  plot_enrichr("KEGG_2019") 

datatable(eresList[[2]], options = list(scrollX = TRUE), class = "white-space: nowrap")

Session Info

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] enrichR_3.0                           DT_0.19                              
##  [3] RColorBrewer_1.1-2                    scales_1.1.1                         
##  [5] reshape2_1.4.4                        knitr_1.36                           
##  [7] biomaRt_2.50.0                        GenomicFeatures_1.46.1               
##  [9] AnnotationDbi_1.56.2                  genefilter_1.76.0                    
## [11] DESeq2_1.34.0                         SummarizedExperiment_1.24.0          
## [13] Biobase_2.54.0                        MatrixGenerics_1.6.0                 
## [15] matrixStats_0.61.0                    BSgenome.Dmelanogaster.UCSC.dm6_1.4.1
## [17] BSgenome_1.62.0                       rtracklayer_1.54.0                   
## [19] GenomicRanges_1.46.0                  Biostrings_2.62.0                    
## [21] GenomeInfoDb_1.30.0                   XVector_0.34.0                       
## [23] IRanges_2.28.0                        S4Vectors_0.32.2                     
## [25] BiocGenerics_0.40.0                   forcats_0.5.1                        
## [27] stringr_1.4.0                         dplyr_1.0.7                          
## [29] purrr_0.3.4                           readr_2.0.2                          
## [31] tidyr_1.1.4                           tibble_3.1.6                         
## [33] tidyverse_1.3.1                       EnhancedVolcano_1.12.0               
## [35] ggrepel_0.9.1                         ggplot2_3.3.5                        
## [37] pheatmap_1.0.12                       cluster_2.1.2                        
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1             backports_1.3.0          BiocFileCache_2.2.0     
##   [4] plyr_1.8.6               splines_4.1.2            crosstalk_1.2.0         
##   [7] BiocParallel_1.28.0      digest_0.6.28            htmltools_0.5.2         
##  [10] fansi_0.5.0              magrittr_2.0.1           memoise_2.0.0           
##  [13] tzdb_0.2.0               annotate_1.72.0          modelr_0.1.8            
##  [16] extrafont_0.17           extrafontdb_1.0          prettyunits_1.1.1       
##  [19] colorspace_2.0-2         blob_1.2.2               rvest_1.0.2             
##  [22] rappdirs_0.3.3           haven_2.4.3              xfun_0.29               
##  [25] crayon_1.4.2             RCurl_1.98-1.5           jsonlite_1.7.2          
##  [28] survival_3.2-13          glue_1.5.0               gtable_0.3.0            
##  [31] zlibbioc_1.40.0          DelayedArray_0.20.0      proj4_1.0-10.1          
##  [34] car_3.0-12               Rttf2pt1_1.3.9           maps_3.4.0              
##  [37] abind_1.4-5              DBI_1.1.1                rstatix_0.7.0           
##  [40] Rcpp_1.0.7               xtable_1.8-4             progress_1.2.2          
##  [43] bit_4.0.4                htmlwidgets_1.5.4        httr_1.4.2              
##  [46] ellipsis_0.3.2           farver_2.1.0             pkgconfig_2.0.3         
##  [49] XML_3.99-0.8             sass_0.4.0               dbplyr_2.1.1            
##  [52] locfit_1.5-9.4           utf8_1.2.2               labeling_0.4.2          
##  [55] tidyselect_1.1.1         rlang_0.4.12             munsell_0.5.0           
##  [58] cellranger_1.1.0         tools_4.1.2              cachem_1.0.6            
##  [61] cli_3.1.0                generics_0.1.1           RSQLite_2.2.8           
##  [64] broom_0.7.10             evaluate_0.14            fastmap_1.1.0           
##  [67] yaml_2.2.1               bit64_4.0.5              fs_1.5.0                
##  [70] KEGGREST_1.34.0          ash_1.0-15               ggrastr_1.0.1           
##  [73] xml2_1.3.2               compiler_4.1.2           rstudioapi_0.13         
##  [76] beeswarm_0.4.0           filelock_1.0.2           curl_4.3.2              
##  [79] png_0.1-7                ggsignif_0.6.3           reprex_2.0.1            
##  [82] geneplotter_1.72.0       bslib_0.3.1              stringi_1.7.5           
##  [85] highr_0.9                ggalt_0.4.0              lattice_0.20-45         
##  [88] Matrix_1.3-4             vctrs_0.3.8              pillar_1.6.4            
##  [91] lifecycle_1.0.1          jquerylib_0.1.4          bitops_1.0-7            
##  [94] R6_2.5.1                 BiocIO_1.4.0             KernSmooth_2.23-20      
##  [97] vipor_0.4.5              MASS_7.3-54              assertthat_0.2.1        
## [100] rprojroot_2.0.2          rjson_0.2.20             withr_2.4.2             
## [103] GenomicAlignments_1.30.0 Rsamtools_2.10.0         GenomeInfoDbData_1.2.7  
## [106] parallel_4.1.2           hms_1.1.1                grid_4.1.2              
## [109] rmarkdown_2.11           carData_3.0-4            ggpubr_0.4.0            
## [112] lubridate_1.8.0          ggbeeswarm_0.6.0         restfulr_0.0.13